library('plyr')
library('dplyr')
## Warning: package 'dplyr' was built under R version 3.6.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library('ggplot2')
## Warning: package 'ggplot2' was built under R version 3.6.3
library('stringr')
library('knitr')
library('reshape2')
library('data.table')
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
## The following objects are masked from 'package:dplyr':
##
## between, first, last
dt.HiC <- fread("pairs_statistics_oct2018.csv")
Set RegEx patterns for directory searches for cell level data and spot data in all colors.
pat.p <- "^Pairs.txt$"
pat.s <- "^Spots.txt$"
pat.list <- list(p=pat.p, s=pat.s)
Recursively search the ObjectLevelData directory and its subdirectories for files whose name includes the RegEx patterns defined two chunks above. The path.list functon outputs absolute file names. path.list is a list containing all the filenames on a per cell-level or per color-level (i.e. gr, gf or fr) basis.
directory.search <- function(x){
pat <- dir(pattern = x, full.names = TRUE, recursive = TRUE, include.dirs = TRUE)
}
path.list <- llply(pat.list, directory.search)
Extract file names from absolut path and set them as list element names.
trim.names <- function(x){
names(x) <- basename(x) # This assigns the filename to the file that it is read
y <- x ## This is necessary because of scoping issues
}
path.list <- llply(path.list, trim.names)
Recursively read and merge object level data files as data.frames. Rows are labeled with relative filenames (The .id variable). This and the previous chunks are slightly modified tricks adopted from H. Wickam “Tidy Data” paper.
read.merge <- function(x){
dt <-as.data.table(ldply(x, fread))
}
dt.list <- llply(path.list, read.merge)
Second, read in bioinformatic data from ENCODE and 4DN and collate it into a table on a by-probe basis:
dt.bed = fread("Bioinformatics_Data/Probes_LADDist.txt")
dt.ENCODE = fread("Bioinformatics_Data/ENCODE_BJ_Compiled_out.txt")
dt.4DN = fread("Bioinformatics_Data/4DN_HFF_Compiled_out.txt", fill=TRUE)
dt.4DN.average <- dt.4DN[,list(LaminB1 = median(ct_LaminB1DamIDHFF_7256.value),
Compartment = median(ct_HFFCompartments_7382.valueAverage)),
by=list(locus_ID)]
dt.ENCODE.average <- dt.ENCODE[,list(H3K27me3 = median(hub_2441861_b0ad8a57225d5efca1ab752ff35f7ae0.value),
H3K36me3 = median(hub_2441861_defe32d687110f3d9e3c86fbb0a33dda.valueAverage),
H3K4me3 = median(hub_2441861_e1c84294df1470450fe1c133ebccda94.valueAverage)),
by=list(locus_ID)]
dt.bed[,V6:=NULL]
dt.bed <- dt.bed[complete.cases(dt.bed),]
dt.bed[, locus_ID := paste0(c(chr, ":", start+1, "-", end), collapse=""), by=list(chr, start, end, probe)]
dt.bed[, Genome_Position := (start+end)/2]
setkey(dt.bed, locus_ID)
setkey(dt.4DN, locus_ID)
setkey(dt.4DN.average, locus_ID)
setkey(dt.ENCODE, locus_ID)
setkey(dt.ENCODE.average, locus_ID)
dt.sequencing <- dt.4DN.average[dt.ENCODE.average,]
dt.sequencing <- dt.sequencing[dt.bed,]
Separate the cell level data from the distance and spot level data.
dt.spots <- dt.list$s
dt.all <- dt.list$p
dt.all[SpotDist.micron > 1,SpotDist.Threshold := "A (none)"]
dt.all[SpotDist.micron<=1, SpotDist.Threshold := "B (1um)"]
dt.all[SpotDist.micron<=0.35, SpotDist.Threshold := "C (350nm)"]
# Also add nudged distance, to make log plots on the basis of distance more sensible if necessary (check minimum value, see if it's nonzero first)
dt.all[,SpotDist.nudged := SpotDist.micron + 0.0001]
dt.all[,`:=`(Probe1PercDiploid = (PercDiploidGreen*(Channel1=="Green")) +
(PercDiploidRed*(Channel1=="Red") +
(PercDiploidFarRed*(Channel1=="Far Red"))),
Probe2PercDiploid = (PercDiploidGreen*(Channel2=="Green")) +
(PercDiploidRed*(Channel2=="Red") +
(PercDiploidFarRed*(Channel2=="Far Red"))))]
dt.spots[,`:=`(PercDiploid = (PercDiploidGreen*(color=="Green")) +
(PercDiploidRed*(color=="Red") +
(PercDiploidFarRed*(color=="Far Red"))))]
QC_thresh <- 0.6
dt.all[Probe1 == 88, Probe1 := 89]
dt.all[Probe2 == 88, Probe2 := 89]
dt.spots[,Probe := (Green_Probe*(color=="Green") +
Red_Probe*(color=="Red") +
FarRed_Probe*(color=="Far Red"))]
dt.spots[Probe == 88, Probe := 89]
dt.all[,`:=`(Probe1.HiC = ceiling(Probe1),
Probe2.HiC = ceiling(Probe2))]
dt.all[, Probe_Pair := interaction(Probe1, Probe2)]
setkey(dt.all, chr, Probe1.HiC, Probe2.HiC)
setkey(dt.HiC, chr, TargetBaitID1, TargetBaitID2)
dt.all.HiC <- dt.all[dt.HiC,]
dt.all[,chr_formatted := paste0(c("chr", chr), collapse=""), by=list(chr, Probe1)]
setkey(dt.all, chr_formatted, Probe1)
setkey(dt.bed, chr, probe)
dt.all <- dt.all[dt.bed[,c("chr", "probe", "Genome_Position")]]
dt.all <- rename(dt.all, c(GP_1 = "Genome_Position"))
setkey(dt.all, chr_formatted, Probe2)
dt.all <- dt.all[dt.bed[,c("chr", "probe", "Genome_Position")]]
dt.all <- rename(dt.all, c(GP_2 = 'Genome_Position'))
dt.all[, Genomic_Distance := abs(GP_2 - GP_1)]
# very low representation in shell five because dilated nucleus + skinny shell, so lump these in with shell 4.
dt.all[AreaShell.spot1.COG == 5, AreaShell.spot1.COG:=4]
dt.all[AreaShell.spot2.COG == 5, AreaShell.spot2.COG:=4]
# Also recalculate equi-area shell assignment for all spots
dt.spots[, AreaShell.COG := 4]
dt.spots[r.cog < sqrt(0.8), AreaShell.COG := 3]
dt.spots[r.cog < sqrt(0.6), AreaShell.COG := 2]
dt.spots[r.cog < sqrt(0.4), AreaShell.COG := 1]
dt.spots[r.cog < sqrt(0.2), AreaShell.COG := 0]
dt.all[,Region := "Chr1 Set 1"]
dt.all[chr==1 & Probe1 %in% c(354,385,422, 463, 476, 360, 375, 389), Region :="Chr1 Set 2"]
dt.all[chr==17, Region := "Chr17"]
dt.all[chr==18, Region := "Chr18"]
dt.all[chr==4, Region := "Chr 4 Set 1"]
dt.all[chr==4 & Probe1 > 500, Region := "Chr4 Set 2"]
dt.all$Region <- factor(dt.all$Region, levels=c("Chr1 Set 1", "Chr1 Set 2", "Chr17", "Chr18", "Chr4 Set 1", "Chr4 Set 2"))
dt.all[,LAD_Stat := "Test"]
dt.all[chr==1 & Probe1 %in% c(52,23, 73, 120, 255,354,385,422), LAD_Stat := "LAD"]
dt.all[chr==1 & Probe1 %in% c(11,74,80, 249, 463,476), LAD_Stat := "Border"]
dt.all[chr==1 & Probe1 %in% c(88, 91, 232, 441,360,375, 389, 433), LAD_Stat := "Inter-LAD"]
dt.all[chr==17 & Probe1 %in% c(40, 133, 142, 146, 206, 263, 278), LAD_Stat := "LAD"]
dt.all[chr==17 & Probe1 %in% c(86, 121, 123, 156, 195, 256), LAD_Stat := "Inter-LAD"]
dt.all[chr==18 & Probe1 %in% c(157,163,167,296,123, 193, 303), LAD_Stat := "LAD"]
dt.all[chr==18 & Probe1 %in% c(141,186,250), LAD_Stat := "Border"]
dt.all[chr==18 & Probe1 %in% c(11,89), LAD_Stat := "Inter-LAD"]
dt.all[chr==4 & Probe1 %in% c(220, 221, 224, 226.5), LAD_Stat := "Border"]
dt.all[chr==4 & Probe1 %in% c(222, 223, 226, 227, 228), LAD_Stat := "LAD"]
dt.all[chr==4 & Probe1 %in% c(225, 225.5, 229, 231), LAD_Stat := "Inter-LAD"]
dt.all[chr==4 & Probe1 > 500 , LAD_Stat := "Inter-LAD"]
dt.all.filtered <- dt.all[Probe1PercDiploid > QC_thresh & Probe2PercDiploid > QC_thresh,]
dt.sameShells <- dt.all[AreaShell.spot1.COG==AreaShell.spot2.COG,]
Pull out data by allele for allele-independence plots:
dt.2spots <- dt.spots[SpotIndex %in% c(1,2) & PercDiploid > 0.6,]
dt.alleles <- dcast(dt.2spots, AssayIndex + nrow + ncol + color + FieldIndex + CellIndex +
chr + Probe + Cell_Type + Culture_Condition ~ SpotIndex, mean,
value.var="r.cog")
dt.alleles <- rename(dt.alleles, c(r.cog.1 = '1', r.cog.2 = '2'))
rm(dt.2spots)
Pull out data by green spot for clustering information:
dt.all.trips <- dcast(dt.sameShells[PercDiploidGreen>QC_thresh & PercDiploidRed>QC_thresh & PercDiploidFarRed > QC_thresh],
AssayIndex + nrow + ncol + Green_Probe + Red_Probe + FarRed_Probe + chr +
FieldIndex + CellIndex + SpotIndex1 + Spot1.r.cog ~ Channel2, min,
subset = .(Channel1 == "Green"), value.var = "SpotDist.micron")
dt.all.trips <- rename(dt.all.trips, c(SpotDist.Red = Red, SpotDist.FarRed = `Far Red`))
dt.all.trips <- dt.all.trips[is.finite(SpotDist.Red) & is.finite(SpotDist.FarRed),]
dt.all.trips[SpotDist.Red > 1 & SpotDist.FarRed > 1, Triplet:="A (none)"]
dt.all.trips[SpotDist.Red <= 1 & SpotDist.FarRed > 1, Triplet:="B (paired with red)"]
dt.all.trips[SpotDist.Red > 1 & SpotDist.FarRed <= 1, Triplet:="C (paired with far red)"]
dt.all.trips[SpotDist.Red <= 1 & SpotDist.FarRed <= 1, Triplet:="D (triplet)"]
Calculate coefficients of variation for groups by radial position bin:
dt.variabilities.bin1 <- dt.all[, list(StdDev.bybin1 = sd(SpotDist.micron),
CoV.bybin1 = sd(SpotDist.micron)/mean(SpotDist.micron),
N.bybin1 = .N),
by=list(Probe1, Probe2, AreaShell.spot1.COG)]
dt.variabilities.bin2 <- dt.all[, list(StdDev.bybin2 = sd(SpotDist.micron),
CoV.bybin2 = sd(SpotDist.micron)/mean(SpotDist.micron),
N.bybin2 = .N),
by=list(Probe1, Probe2, AreaShell.spot2.COG) ]
dt.counts <- dt.all.filtered[,list(All = .N,
"100nm" = sum(SpotDist.micron < 0.1),
"350nm" = sum(SpotDist.micron < 0.35),
"1um" = sum(SpotDist.micron < 1),
"Median" = median(SpotDist.micron)),
by = list(Cell_Type, chr, Region, Probe1, Probe2, Probe_Pair)]
dt.counts.positional <- dt.all.filtered[,list(All = .N,
"100nm" = sum(SpotDist.micron < 0.1),
"350nm" = sum(SpotDist.micron < 0.35),
"1um" = sum(SpotDist.micron < 1)),
by = list(Cell_Type, chr, Region, Probe1, Probe2, Probe_Pair, AreaShell.spot1.COG)]
dt.counts.positional.melted <- melt(dt.counts.positional, id.vars = c("chr", "Region", "Cell_Type", "Probe1", "Probe2", "Probe_Pair", "AreaShell.spot1.COG"),
measure.vars = c("All", "100nm", "350nm", "1um"))
dt.counts.positional.melted[,variable:=factor(variable, levels=c("All", "1um", "350nm", "100nm"))]
#Subsample the data to allow eas(ier) comparison between H1 and HFF: 1) Define which probe pairs are measured in H1s and HFFs (intersected_ProbePairs) 2) Pare down dt.all.filtered to include only those probe pairs 3) Subsample for 1000 rows with replacement
(But try to do it without saving the subset to save even a little bit of memory yikes)
H1_ProbePairs <- unique(dt.all.filtered[Cell_Type == "H1", Probe_Pair])
HFF_ProbePairs <- unique(dt.all.filtered[Cell_Type == "HFF", Probe_Pair])
intersected_ProbePairs <- intersect(H1_ProbePairs, HFF_ProbePairs)
dt.all.subsampled <- data.table(dt.all.filtered[Probe_Pair %in% intersected_ProbePairs] %>%
group_by(Probe_Pair, Cell_Type) %>%
sample_n(1000, replace=TRUE))
#Summarize data by median radial position & most common shell; add in bioinformatic data
First, calculate average radial position and most-common-shell for each gene on a per-well basis (with % diploid), and overall after filtering by % diploid
dt.loci <- dt.spots[, list(rad.pos.median = median(r.cog),
rad.pos.mean = mean(r.cog),
rad.shell.mode = which.max(tabulate(AreaShell.COG+1))-1,
count = .N),
by = list(AssayIndex, Cell_Type, Culture_Condition,
chr, Probe, color, PercDiploid)]
dt.loci.pooled <- dt.spots[PercDiploid > 0.6, list(rad.pos.median = median(r.cog),
rad.pos.mean = mean(r.cog),
rad.shell.mode = which.max(tabulate(AreaShell.COG+1))-1,
count = .N),
by = list(Cell_Type, chr, Probe)]
And synch up with the pooled probe level data:
dt.loci.pooled[, chr_formatted := paste0(c("chr", chr), collapse=""), by=list(chr, Probe)]
setkey(dt.sequencing, chr, probe)
setkey(dt.loci.pooled, chr_formatted, Probe)
dt.loci.pooled <- dt.loci.pooled[dt.sequencing]
dt.loci.pooled.HFF <- dt.loci.pooled[complete.cases(dt.loci.pooled) & Cell_Type == "HFF",]
dt.bed[,Genome_Position := (start+end)/2]
dt.spots[, chr_formatted := paste0(c("chr", chr), collapse=""), by=list(.id, AssayIndex, nrow, ncol, color,
FieldIndex, CellIndex, SpotIndex,
Cell_Type, Culture_Condition, PercDiploid,
chr)]
setkey(dt.bed, chr, probe)
setkey(dt.spots, chr_formatted, Probe)
dt.spots <- dt.spots[dt.bed, nomatch=0]
##’ Color-blind friendly palette ##’ From ##’ @title Color-blind friendly palette ##’ @param palette Choose “cb”, “rcb”, or “bly”. cb is the Winston ##’ Chang color blind palette; rcb is that palette in reverse; bly ##’ puts the yellow and blue in the palette first. ##’ @return Variations on an eight-color color-blind friendly palette. ##’ @author Winston Chang / Kieran Healy ##’ @export
dt.selected.distancecontrols <- dt.all.subsampled[Cell_Type == "HFF" & Probe_Pair %in% c("120.120", "611.611", "89.91", "73.74", "226.226.5", "614.614.5", "614.615", "609.614", "23.52", "52.120")]
dt.selected.distancecontrols$Probe_Pair <- with(dt.selected.distancecontrols, reorder(Probe_Pair, Genomic_Distance, median))
cumulDist <- ggplot(dt.selected.distancecontrols, mapping=aes(x=SpotDist.micron, group=Probe_Pair))
cumulDist + stat_ecdf(size=1, aes(color=log10((Genomic_Distance/1000)+1))) +
scale_color_viridis_c("Genomic Distance\n(kbp, log 10)") +
scale_x_continuous("2D Distance (micron)", limits=c(0,8)) +
scale_y_continuous("Density", breaks=c(0,1))
violbox <- ggplot(dt.selected.distancecontrols, mapping=aes(y=SpotDist.micron, x=Probe_Pair))
violbox + geom_jitter(width=0.3, alpha=0.2, aes(color=log10((Genomic_Distance/1000)+1))) +
geom_boxplot(width = 0.3) +
theme(legend.position="bottom")+
scale_color_viridis_c("Genomic Distance (kbp, log 10)") +
scale_x_discrete("Probe Pair") +
scale_y_continuous("2D Distance", limits=c(0,8))
cumulDist <- ggplot(dt.spots[Cell_Type == "HFF",], mapping=aes(x=r.cog, color=Genome_Position, group=Genome_Position))
cumulDist + stat_ecdf(size=1, aes(color=Genome_Position)) +
facet_wrap(. ~ chr) +
scale_color_viridis_c("Genomic Position") +
scale_x_continuous("Radial Position", limits=c(0,1)) +
scale_y_continuous("Density", breaks=c(0,1))
dt.loci.melted <- melt(dt.loci.pooled.HFF, id.vars = c("Cell_Type", "chr", "start", "end", "Probe", "rad.pos.median", "rad.pos.mean", "rad.shell.mode", "count", "chr_formatted", "locus_ID"), variable.name="Assay")
plot <- ggplot(dt.loci.melted[Assay %in% c("LaminB1", "Compartment", "H3K27me3", "H3K4me3"),],
mapping=aes(y=rad.pos.median, x=value, color=as.factor(rad.shell.mode)))
plot + geom_point(size=2) +
scale_color_viridis_d("Most Common\nEqui-Area\nShell") +
scale_y_continuous("Median Radial\nPosition", limits=c(0,1)) +
facet_grid(.~Assay, scales="free")+
scale_x_continuous("Normalized Signal")
scatter <- ggplot(data.table(dt.alleles[chr==1 & Probe %in% c(11,52,91,476),] %>%
group_by(Probe) %>%
sample_n(1000, replace=TRUE)),
mapping=aes(x=r.cog.1, y=r.cog.2))
scatter + stat_density2d_filled() +
facet_wrap(.~Probe, ncol=4) +
scale_fill_viridis_d("Density", guide=guide_colorsteps(barheight=6))+
scale_x_continuous("Radial Position (Homolog 1)", limits=c(-0.01,1.01)) +
scale_y_continuous("Radial Position\n(Homolog 2)", limits=c(-0.01,1.01))
scatter <- ggplot(data.table(dt.alleles[chr==1 & Probe %in% c(11,52,91,476),] %>%
group_by(Probe) %>%
sample_n(1000, replace=TRUE)),
mapping=aes(x=r.cog.1, y=r.cog.2))
scatter + stat_density2d_filled() +
facet_wrap(.~Probe, ncol=2) +
scale_fill_viridis_d("Density", guide=guide_colorsteps(barheight=10))+
scale_x_continuous("Radial Position (Allele 1)", limits=c(-0.01,1.01)) +
scale_y_continuous("Radial Position (Allele 2)", limits=c(-0.01,1.01))
scatter <- ggplot(data.table(dt.all.filtered[chr==4 & Probe1 == 609 & Probe2 %in% c(610, 611, 614, 616, 617, 618),] %>%
group_by(Probe2) %>%
sample_n(1000, replace=TRUE)),
mapping=aes(x=Spot1.r.cog, y=Spot2.r.cog))
scatter + stat_density2d_filled() +
facet_wrap(.~Probe2, ncol=3) +
scale_fill_viridis_d("Density", guide=guide_colorsteps(barheight=10))+
scale_x_continuous("Radial Position: Upstream Locus", limits=c(-0.01,1.01)) +
scale_y_continuous("Radial Position:\nDownstream Locus", limits=c(-0.01,1.01))
scatter <- ggplot(data.table(dt.all.filtered[chr==4 & Probe1 == 609 & Probe2 %in% c(610, 611, 614, 616, 617, 618),] %>%
group_by(Probe2) %>%
sample_n(1000, replace=TRUE)),
mapping=aes(x=Spot1.r.cog, y=Spot2.r.cog))
scatter + stat_density2d_filled() +
facet_wrap(.~Probe2, ncol=6) +
scale_fill_viridis_d("Density", guide=guide_colorsteps(barheight=6))+
scale_x_continuous("Radial Position: Upstream Locus", limits=c(-0.01,1.01), breaks=c(0,0.5,1)) +
scale_y_continuous("Radial Position:\nDownstream Locus", limits=c(-0.01,1.01), breaks=c(0,0.5,1))
cumulDist <- ggplot(dt.spots[Cell_Type == "HFF" & chr==4 & Probe > 300 & Probe != 613,],
mapping=aes(x=r.cog, group=Genome_Position))
cumulDist + stat_ecdf(size=1, aes(color=Distance_to_LAD/1000)) +
scale_color_viridis_c("Distance\nto LAD\n(kbp)", begin=1, end=0) +
scale_x_continuous("Radial Position", limits=c(0,1)) +
scale_y_continuous("Density")
cumulDist <- ggplot(dt.spots[Cell_Type == "HFF" & chr==17,],
mapping=aes(x=r.cog, group=Genome_Position))
cumulDist + stat_ecdf(size=1, aes(color=Distance_to_LAD/1000)) +
scale_color_viridis_c("Distance\nto LAD\n(kbp)", begin=1, end=0) +
scale_x_continuous("Radial Position", limits=c(0,1)) +
scale_y_continuous("Density")
Calculate correlations for heat maps
dt.all.subsampled <- data.table(dt.all.filtered %>%
group_by(Probe_Pair, Cell_Type) %>%
sample_n(1000, replace=TRUE))
testcorrelation <- function(dt){
spot1.p <- anova(lm(SpotDist.micron ~ Spot1.r.cog, data=dt))$`Pr(>F)`[1]
spot2.p <- anova(lm(SpotDist.micron ~ Spot2.r.cog, data=dt))$`Pr(>F)`[1]
spot1.m <- summary(lm(SpotDist.micron ~ Spot1.r.cog, data=dt))$coefficients[2]
spot2.m <- summary(lm(SpotDist.micron ~ Spot2.r.cog, data=dt))$coefficients[2]
data.table("Spot1.M" = spot1.m, "Spot2.M" = spot2.m,
"Spot1.P" = spot1.p, "Spot2.P" = spot2.p)
}
dt.lms <- data.table(ddply(dt.all.subsampled, .(chr, Probe1, Probe2, Cell_Type, Region), testcorrelation))
dt.lms[,`:=`(Spot1.P.Maxxed = max(Spot1.P, 0.00001),
Spot2.P.Maxxed = max(Spot2.P, 0.00001)),
by=list(chr, Probe1, Probe2, Cell_Type)]
scatter <- ggplot(dt.all.filtered[chr==1 & Probe1 == 52 & Probe2 %in% c(994, 91),],
mapping=aes(x=SpotDist.micron, y=Spot1.r.cog))
scatter + stat_density2d_filled() +
facet_wrap(.~Probe2, ncol=2) +
scale_fill_viridis_d("Density", guide=guide_colorsteps(barheight=6))+
scale_x_continuous("2D Distance (microns)", limits=c(-0.01,5), breaks=c(0:5)) +
scale_y_continuous("Radial Position:\nUpstream Locus", limits=c(-0.01,1.01), breaks=c(0,0.5,1))
scatter <- ggplot(dt.all.filtered[chr==1 & Probe1 == 52 & Probe2 %in% c(994, 91),],
mapping=aes(x=SpotDist.micron, y=Spot1.r.cog))
scatter + stat_density2d_filled() +
facet_wrap(.~Probe2, ncol=1) +
scale_fill_viridis_d("Density", guide=guide_colorsteps(barheight=10, barwidth=0.5))+
scale_x_continuous("2D Distance\n(microns)", limits=c(-0.01,5), breaks=c(0:5)) +
scale_y_continuous("Radial Position", limits=c(-0.01,1.01), breaks=c(0,0.5,1))
scatter <- ggplot(dt.all.filtered[chr==1 & Probe1 == 52 & Probe2 %in% c(73, 91),],
mapping=aes(x=SpotDist.micron, y=Spot1.r.cog))
scatter + stat_density2d_filled() +
facet_wrap(interaction(Cell_Type, Probe_Pair)~., nrow=1) +
scale_fill_viridis_d("Density", guide=guide_colorsteps(barheight=5, barwidth=0.5))+
scale_x_continuous("2D Distance (microns)", limits=c(-0.01,5), breaks=c(0:5)) +
scale_y_continuous("Radial Position", limits=c(-0.01,1.01), breaks=c(0,0.5,1))
Heat map: color is coefficient (directional), alpha is p-value (brighter = more significant)
plot <- ggplot(dt.lms[dt.lms$Cell_Type == "HFF" & chr==1,],
mapping=aes(x=as.factor(Probe1), y=as.factor(Probe2),
fill=Spot1.M, alpha=-log10(Spot1.P)))
plot + geom_tile() +
facet_wrap(.~chr, scales="free") +
scale_x_discrete("Upstream Probe") +
scale_y_discrete("Downstream Probe") +
scale_alpha_continuous("Sig.\n(log10)", limits=c(0,10))+
scale_fill_viridis_c("Direction", limits=c(-13.007, 13.007))
plot <- ggplot(dt.lms[dt.lms$Cell_Type == "HFF" & chr==4,],
mapping=aes(x=as.factor(Probe1), y=as.factor(Probe2),
fill=Spot1.M, alpha=-log10(Spot1.P)))
plot + geom_tile() +
facet_wrap(.~chr, scales="free") +
scale_x_discrete("Upstream Probe") +
scale_y_discrete("Downstream Probe") +
scale_alpha_continuous("Significance\nof correlation\n(log10)", limits=c(0,10))+
scale_fill_viridis_c("Direction\nof correlation", limits=c(-13.007, 13.007))
plot <- ggplot(dt.lms[dt.lms$Cell_Type == "HFF" & chr %in% c(17,18),],
mapping=aes(x=as.factor(Probe1), y=as.factor(Probe2),
fill=Spot1.M, alpha=-log10(Spot1.P)))
plot + geom_tile() +
facet_wrap(.~chr, scales="free", nrow=2) +
scale_x_discrete("Upstream Probe") +
scale_y_discrete("Downstream Probe") +
scale_alpha_continuous("Significance\nof correlation\n(log10)", limits=c(0,10))+
scale_fill_viridis_c("Direction\nof correlation", limits=c(-13.007, 13.007))
plot <- ggplot(dt.lms[dt.lms$Cell_Type == "HFF" & chr==1,],
mapping=aes(x=as.factor(Probe1), y=as.factor(Probe2),
fill=Spot2.M, alpha=-log10(Spot2.P)))
plot + geom_tile() +
facet_wrap(.~chr, scales="free") +
scale_x_discrete("Upstream Probe") +
scale_y_discrete("Downstream Probe") +
scale_alpha_continuous("Significance\nof correlation\n(log10)", limits=c(0,10))+
scale_fill_viridis_c("Direction\nof correlation", limits=c(-13.007, 13.007))
plot <- ggplot(dt.lms[dt.lms$Cell_Type == "HFF" & chr==4,],
mapping=aes(x=as.factor(Probe1), y=as.factor(Probe2),
fill=Spot2.M, alpha=-log10(Spot2.P)))
plot + geom_tile() +
facet_wrap(.~chr, scales="free") +
scale_x_discrete("Upstream Probe") +
scale_y_discrete("Downstream Probe") +
scale_alpha_continuous("Significance\nof correlation\n(log10)", limits=c(0,10))+
scale_fill_viridis_c("Direction\nof correlation", limits=c(-13.007, 13.007))
plot <- ggplot(dt.lms[dt.lms$Cell_Type == "HFF" & chr %in% c(17,18),],
mapping=aes(x=as.factor(Probe1), y=as.factor(Probe2),
fill=Spot2.M, alpha=-log10(Spot2.P)))
plot + geom_tile() +
facet_wrap(.~chr, scales="free", nrow=2) +
scale_x_discrete("Upstream Probe") +
scale_y_discrete("Downstream Probe") +
scale_alpha_continuous("Significance\nof correlation\n(log10)", limits=c(0,10))+
scale_fill_viridis_c("Direction\nof correlation", limits=c(-13.007, 13.007))
CDFs of three example pairs with three different patterns
plot <- ggplot(dt.all.filtered[Cell_Type == "HFF" & Probe_Pair %in% c("121.142", "86.123", "91.255"),],
mapping=aes(x=SpotDist.micron, color=as.factor(AreaShell.spot1.COG), group=AreaShell.spot1.COG))
plot + stat_ecdf(size=1) +
facet_wrap(. ~ Probe_Pair, nrow=3) +
theme(strip.text = element_blank())+
scale_color_viridis_d("Equi-Area\nShell (Bait)") +
scale_x_log10("2D Distance (microns; log10)", limits=c(0.05,20)) +
scale_y_continuous("Cumulative\nFrequency", breaks=c(0,1))
dt.all.filtered.coopvsadd <- melt(dt.all.filtered[Cell_Type=="HFF" & Probe_Pair %in% c("52.255", "52.89", "11.52")],
id.vars = c("AssayIndex", "nrow", "ncol", "FieldIndex", "CellIndex",
"SpotIndex1", "SpotIndex2", "Probe1", "Probe2", "Probe_Pair",
"Cell_Type", "Culture_Condition", "Region",
"SpotDist.micron", "SpotDist.Threshold", "SpotDist.nudged"),
measure.vars = c("Spot1.r.cog", "Spot2.r.cog"),
variable.name = "Locus",
value.name = "r.cog")
plot <- ggplot(dt.all.filtered.coopvsadd[SpotDist.Threshold != "D (100nm)",],
mapping=aes(x=r.cog, color=as.factor(SpotDist.Threshold), group=SpotDist.Threshold))
plot + stat_ecdf(size=1) +
facet_grid(Locus ~ Probe_Pair)+
scale_color_viridis_d("Equi-Area\nShell (Bait)") +
scale_x_continuous("2D Distance (microns; log10)", limits=c(0,1), breaks=c(0,1)) +
scale_y_continuous("Cumulative\nFrequency")
plot <- ggplot(dt.all.filtered.coopvsadd[SpotDist.Threshold != "D (100nm)",],
mapping=aes(x=r.cog,
linetype = as.factor(Locus),
color = as.factor(SpotDist.Threshold),
group=interaction(as.factor(Locus), as.factor(SpotDist.Threshold))))
plot + stat_ecdf(size=1) +
facet_grid(. ~ Probe_Pair)+
scale_color_viridis_d("Distance Threshold") +
scale_linetype("Locus for Radial Position") +
theme(legend.position="bottom", legend.box="vertical")+
scale_x_continuous("2D Distance (microns; log10)", limits=c(0,1), breaks=c(0,1)) +
scale_y_continuous("Cumulative\nFrequency")
plot <- ggplot(dt.all.filtered.coopvsadd[SpotDist.Threshold != "D (100nm)" & Probe_Pair == "11.52",],
mapping=aes(x=r.cog, color=as.factor(SpotDist.Threshold), group=SpotDist.Threshold))
plot + stat_ecdf(size=1) +
facet_grid(Locus ~ Probe_Pair)+
scale_color_viridis_d("2D Distance\nThreshold") +
scale_x_continuous("Radial Position", limits=c(0,1), breaks=c(0,1)) +
scale_y_continuous("Cumulative\nFrequency")
plot <- ggplot(dt.all.filtered.coopvsadd[SpotDist.Threshold != "D (100nm)" & Probe_Pair == "52.89",],
mapping=aes(x=r.cog, color=as.factor(SpotDist.Threshold), group=SpotDist.Threshold))
plot + stat_ecdf(size=1) +
facet_grid(Locus ~ Probe_Pair)+
scale_color_viridis_d("2D Distance\nThreshold") +
scale_x_continuous("Radial Position", limits=c(0,1), breaks=c(0,1)) +
scale_y_continuous("Cumulative\nFrequency")
plot <- ggplot(dt.all.filtered.coopvsadd[SpotDist.Threshold != "D (100nm)" & Probe_Pair == "52.255",],
mapping=aes(x=r.cog, color=as.factor(SpotDist.Threshold), group=SpotDist.Threshold))
plot + stat_ecdf(size=1) +
facet_grid(Locus ~ Probe_Pair)+
scale_color_viridis_d("2D Distance\nThreshold") +
scale_x_continuous("Radial Position", limits=c(0,1), breaks=c(0,1)) +
scale_y_continuous("Cumulative\nFrequency")
52-91-994 CDFs to show preference changes at different positions
dt.samp <- dt.all.filtered[chr==1 & Probe1 == 52 & Probe2 %in% c(91, 994) & AreaShell.spot1.COG < 5,]
plot <- ggplot(dt.samp, mapping=aes(x=SpotDist.micron, color=Probe2))
plot + stat_ecdf(size=1, aes(color=as.factor(Probe2))) +
facet_grid(AreaShell.spot1.COG~.) +
scale_color_viridis_d("Target\nProbe") +
scale_x_log10("2D Distance to LAD", limits=c(0.05,20)) +
scale_y_continuous("Cumulative Frequency", limits=c(0,1), breaks=c(0,1))
dt.samp <- dt.all.filtered[chr==1 & Probe1 == 52 & Probe2 %in% c(91, 120, 994) & AreaShell.spot1.COG < 5,]
plot <- ggplot(dt.samp, mapping=aes(x=SpotDist.micron, color=Probe2))
plot + stat_ecdf(size=1, aes(color=as.factor(Probe2))) +
facet_grid(.~AreaShell.spot1.COG) +
scale_color_viridis_d("Target\nProbe") +
scale_x_continuous("2D Distance to 52", limits=c(0.1,8.1)) +
scale_y_continuous("Cumulative\nFrequency", limits=c(0,1), breaks=c(0,1))
plot <- ggplot(dt.spots[Probe %in% c(11, 23, 52, 73, 74, 80, 89, 91, 120),],
mapping=aes(x=as.factor(Cell_Type), fill=as.factor(AreaShell.COG)))
plot + geom_bar(position="fill", stat="count") +
facet_grid(.~Probe, scales="free") +
scale_fill_viridis_d("Equi-Area\nShell") +
scale_x_discrete("Probe") +
scale_y_continuous("Proportion\nof Spots")
plot <- ggplot(dt.all.filtered[Probe_Pair %in% intersected_ProbePairs,],
mapping=aes(x=SpotDist.micron, color=Genomic_Distance, group=Probe_Pair))
plot + stat_ecdf(size=1) +
facet_grid(.~Cell_Type) +
scale_color_viridis_c("Genomic Distance") +
scale_x_continuous("2D Distance (microns)", limits=c(0,15), breaks=c(0,5,10,15)) +
scale_y_continuous("Cumulative\nFrequency", limits=c(0,1), breaks=c(0,0.5,1))
plot <- ggplot(dt.all.filtered[Probe_Pair %in% c("11.52", "23.52", "52.73"),],
mapping=aes(x=SpotDist.micron, color=Cell_Type))
plot + stat_ecdf(size=1) +
facet_wrap(Probe_Pair~., nrow=3) +
scale_color_viridis_d("Cell Type") +
scale_x_log10("2D Distance (microns)", limits=c(0.05,20), breaks=c(0.1,1,10)) +
scale_y_continuous("Cumulative\nFrequency", limits=c(0,1), breaks=c(0,0.5,1))
plot <- ggplot(dt.all.filtered[Probe_Pair %in% c("23.52","52.74"),],
mapping=aes(x=SpotDist.micron, color=Cell_Type))
plot + stat_ecdf(size=1) +
facet_wrap(Probe_Pair~., nrow=2) +
scale_color_viridis_d("Cell Type") +
scale_x_log10("2D Distance (microns)", limits=c(0.05,20), breaks=c(0.1,1,10)) +
scale_y_continuous("Cumulative\nFrequency", limits=c(0,1), breaks=c(0,0.5,1))
plot <- ggplot(dt.all.filtered[Probe_Pair %in% c("11.74", "11.52", "11.89"),],
mapping=aes(x=SpotDist.micron, color=as.factor(AreaShell.spot1.COG), group=as.factor(AreaShell.spot1.COG)))
plot + stat_ecdf(size=1) +
facet_grid(Cell_Type ~ Probe_Pair) +
scale_color_viridis_d("Equi-\nArea\nShell", position="bottom") +
scale_x_log10("2D Distance (microns)", limits=c(0.05,20), breaks=c(0.1,1,10)) +
scale_y_continuous("Cumulative\nFrequency", limits=c(0,1), breaks=c(0,0.5,1))
Heat map: color is coefficient (directional), alpha is p-value (brighter = more significant)
plot <- ggplot(dt.lms[dt.lms$Cell_Type == "H1",],
mapping=aes(x=as.factor(Probe1), y=as.factor(Probe2),
fill=Spot1.M, alpha=-log10(Spot1.P)))
plot + geom_tile() +
theme(legend.position="none")+
scale_x_discrete("Upstream Probe") +
scale_y_discrete("Downstream Probe") +
ggtitle("Upstream Spot") +
scale_alpha_continuous("Significance\n(log10)", limits=c(0,10))+
scale_fill_viridis_c("Slope", limits=c(-13.007, 13.007))
plot <- ggplot(dt.lms[dt.lms$Cell_Type == "H1",],
mapping=aes(x=as.factor(Probe1), y=as.factor(Probe2),
fill=Spot2.M, alpha=-log10(Spot2.P)))
plot + geom_tile() +
theme(legend.position="right", legend.box="horizontal")+
ggtitle("Downstream Spot") +
scale_x_discrete("Upstream Probe") +
scale_y_discrete("Downstream Probe") +
scale_alpha_continuous("Sig.\n(log10)", limits = c(0,10), guide=guide_legend(keyheight=1))+
scale_fill_viridis_c("Slope", limits=c(-13.007, 13.007), guide=guide_colorbar(barheight=7))
plot <- ggplot(dt.all.filtered[Cell_Type=="HFF" & !(chr == 18 & Probe1 == 89),],
mapping=aes(x=as.factor(SpotDist.Threshold), fill=as.factor(AreaShell.spot1.COG)))
plot + geom_bar(position="fill", stat="count") +
facet_wrap(.~interaction(chr,Probe1), nrow=6) +
theme(strip.background=element_blank(), strip.text=element_blank())+
scale_fill_viridis_d("Equi-Area\nShell") +
scale_x_discrete("Probe & Threshold") +
scale_y_continuous("Proportion of Spots")
plot <- ggplot(dt.all.subsampled[Probe_Pair %in% intersected_ProbePairs,
list("SD" = sd(SpotDist.micron), "M" = mean(SpotDist.micron),
"SD_M" = sd(SpotDist.micron)/mean(SpotDist.micron),
"error_min" = 0.96*sd(SpotDist.micron)/mean(SpotDist.micron),
"error_max" = 1.05*sd(SpotDist.micron)/mean(SpotDist.micron)),
by=list(Probe_Pair, Cell_Type)],
mapping=aes(x=Probe_Pair, fill=Cell_Type, y = SD_M))
plot + geom_bar(stat="identity", position="dodge", width=0.75) +
geom_errorbar(aes(ymin=error_min, ymax=error_max), position=position_dodge(0.75), width=0.2) +
scale_fill_viridis_d("Cell Type") +
scale_x_discrete("Probe Pair") +
scale_y_continuous("Coefficient of Variation")
plot <- ggplot(dt.all.subsampled[Probe_Pair %in% c("23.52", "52.73", "52.74", "23.80", "52.80", "52.120", "80.120"),
list("SD" = sd(SpotDist.micron), "M" = mean(SpotDist.micron),
"SD_M" = sd(SpotDist.micron)/mean(SpotDist.micron),
"error_min" = 0.96*sd(SpotDist.micron)/mean(SpotDist.micron),
"error_max" = 1.05*sd(SpotDist.micron)/mean(SpotDist.micron)),
by=list(Probe_Pair, Cell_Type)],
mapping=aes(x=Probe_Pair, fill=Cell_Type, y = SD_M))
plot + geom_bar(stat="identity", position="dodge", width=0.75) +
geom_errorbar(aes(ymin=error_min, ymax=error_max), position=position_dodge(0.75), width=0.2) +
scale_fill_viridis_d("Cell Type") +
scale_x_discrete("Probe Pair", ) +
scale_y_continuous("Coefficient of Variation") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
All radial positions as a bar graph:
plot <- ggplot(dt.spots[Cell_Type == "HFF",],
mapping=aes(x=as.factor(Probe), fill=as.factor(AreaShell.COG)))
plot + geom_bar(position="fill", stat="count") +
facet_wrap(.~chr, nrow=2, scales="free") +
scale_fill_brewer("Equi-Area\nShell", type="div", palette=2) +
scale_x_discrete("Probe", labels=NULL) +
scale_y_continuous("Proportion of Spots")
Individual rad pos vs sequencing:
plot <- ggplot(dt.loci.pooled.HFF,
mapping=aes(y=rad.pos.median, x=Compartment, color=as.factor(rad.shell.mode)))
plot + geom_point(size=2) +
scale_color_viridis_d("Most Common\nEqui-Area\nShell") +
scale_y_continuous("Median Radial\nPosition", limits=c(0,1)) +
scale_x_continuous("Compartment Score", limits=c(-2,1))
plot <- ggplot(dt.loci.pooled.HFF,
mapping=aes(y=rad.pos.median, x=H3K27me3, color=as.factor(rad.shell.mode)))
plot + geom_point(size=2) +
scale_color_viridis_d("Most Common\nEqui-Area\nShell") +
scale_y_continuous("Median Radial\nPosition", limits=c(0,1)) +
scale_x_continuous("H3K27me3 signal")
plot <- ggplot(dt.loci.pooled.HFF,
mapping=aes(y=rad.pos.median, x=H3K36me3, color=as.factor(rad.shell.mode)))
plot + geom_point(size=2) +
scale_color_viridis_d("Most Common\nEqui-Area\nShell") +
scale_y_continuous("Median Radial Position", limits=c(0,1)) +
scale_x_continuous("H3K36me3 signal", limits=c(0,3))
plot <- ggplot(dt.loci.pooled.HFF,
mapping=aes(y=rad.pos.median, x=H3K4me3, color=as.factor(rad.shell.mode)))
plot + geom_point(size=2) +
scale_color_viridis_d("Most Common\nEqui-Area\nShell") +
scale_y_continuous("Median Radial\nPosition", limits=c(0,1)) +
scale_x_continuous("H3K4me3 signal")
Document the information about the analysis session
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] data.table_1.12.2 reshape2_1.4.3 knitr_1.23 stringr_1.4.0
## [5] ggplot2_3.3.2 dplyr_1.0.2 plyr_1.8.4
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.1 magrittr_1.5 MASS_7.3-51.4
## [4] munsell_0.5.0 tidyselect_1.1.0 viridisLite_0.3.0
## [7] colorspace_1.4-1 R6_2.4.0 rlang_0.4.9
## [10] tools_3.6.0 grid_3.6.0 gtable_0.3.0
## [13] xfun_0.8 withr_2.1.2 htmltools_0.3.6
## [16] ellipsis_0.2.0.1 yaml_2.2.0 digest_0.6.19
## [19] tibble_3.0.4 lifecycle_0.2.0 crayon_1.3.4
## [22] RColorBrewer_1.1-2 purrr_0.3.4 vctrs_0.3.5
## [25] isoband_0.2.3 glue_1.4.2 evaluate_0.14
## [28] rmarkdown_1.13 labeling_0.3 stringi_1.4.3
## [31] compiler_3.6.0 pillar_1.4.7 scales_1.0.0
## [34] generics_0.1.0 pkgconfig_2.0.2